naive bayes implementation in python

# Importing library
import math
import random
import csv
&nbsp;
&nbsp;
# the categorical class names are changed to numberic data
# eg: yes and no encoded to 1 and 0
def encode_class(mydata):
&nbsp;&nbsp;&nbsp;&nbsp;classes = []
&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(mydata)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if mydata[i][-1] not in classes:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;classes.append(mydata[i][-1])
&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(classes)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for j in range(len(mydata)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if mydata[j][-1] == classes[i]:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mydata[j][-1] = i
&nbsp;&nbsp;&nbsp;&nbsp;return mydata&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;
# Splitting the data
def splitting(mydata, ratio):
&nbsp;&nbsp;&nbsp;&nbsp;train_num = int(len(mydata) * ratio)
&nbsp;&nbsp;&nbsp;&nbsp;train = []
&nbsp;&nbsp;&nbsp;&nbsp;# initially testset will have all the dataset
&nbsp;&nbsp;&nbsp;&nbsp;test = list(mydata)
&nbsp;&nbsp;&nbsp;&nbsp;while len(train) &lt; train_num:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# index generated randomly from range 0
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# to length of testset
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;index = random.randrange(len(test))
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;# from testset, pop data rows and put it in train
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;train.append(test.pop(index))
&nbsp;&nbsp;&nbsp;&nbsp;return train, test
&nbsp;
&nbsp;
# Group the data rows under each class yes or
# no in dictionary eg: dict[yes] and dict[no]
def groupUnderClass(mydata):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dict = {}
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(mydata)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if (mydata[i][-1] not in dict):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dict[mydata[i][-1]] = []
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;dict[mydata[i][-1]].append(mydata[i])
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return dict
&nbsp;
&nbsp;
# Calculating Mean
def mean(numbers):
&nbsp;&nbsp;&nbsp;&nbsp;return sum(numbers) / float(len(numbers))
&nbsp;
# Calculating Standard Deviation
def std_dev(numbers):
&nbsp;&nbsp;&nbsp;&nbsp;avg = mean(numbers)
&nbsp;&nbsp;&nbsp;&nbsp;variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
&nbsp;&nbsp;&nbsp;&nbsp;return math.sqrt(variance)
&nbsp;
def MeanAndStdDev(mydata):
&nbsp;&nbsp;&nbsp;&nbsp;info = [(mean(attribute), std_dev(attribute)) for attribute in zip(*mydata)]
&nbsp;&nbsp;&nbsp;&nbsp;# eg: list = [ [a, b, c], [m, n, o], [x, y, z]]
&nbsp;&nbsp;&nbsp;&nbsp;# here mean of 1st attribute =(a + m+x), mean of 2nd attribute = (b + n+y)/3
&nbsp;&nbsp;&nbsp;&nbsp;# delete summaries of last class
&nbsp;&nbsp;&nbsp;&nbsp;del info[-1]
&nbsp;&nbsp;&nbsp;&nbsp;return info
&nbsp;
# find Mean and Standard Deviation under each class
def MeanAndStdDevForClass(mydata):
&nbsp;&nbsp;&nbsp;&nbsp;info = {}
&nbsp;&nbsp;&nbsp;&nbsp;dict = groupUnderClass(mydata)
&nbsp;&nbsp;&nbsp;&nbsp;for classValue, instances in dict.items():
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;info[classValue] = MeanAndStdDev(instances)
&nbsp;&nbsp;&nbsp;&nbsp;return info
&nbsp;
&nbsp;
# Calculate Gaussian Probability Density Function
def calculateGaussianProbability(x, mean, stdev):
&nbsp;&nbsp;&nbsp;&nbsp;expo = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
&nbsp;&nbsp;&nbsp;&nbsp;return (1 / (math.sqrt(2 * math.pi) * stdev)) * expo
&nbsp;
&nbsp;
# Calculate Class Probabilities
def calculateClassProbabilities(info, test):
&nbsp;&nbsp;&nbsp;&nbsp;probabilities = {}
&nbsp;&nbsp;&nbsp;&nbsp;for classValue, classSummaries in info.items():
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;probabilities[classValue] = 1
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(classSummaries)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mean, std_dev = classSummaries[i]
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;x = test[i]
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;probabilities[classValue] *= calculateGaussianProbability(x, mean, std_dev)
&nbsp;&nbsp;&nbsp;&nbsp;return probabilities
&nbsp;
&nbsp;
# Make prediction - highest probability is the prediction
def predict(info, test):
&nbsp;&nbsp;&nbsp;&nbsp;probabilities = calculateClassProbabilities(info, test)
&nbsp;&nbsp;&nbsp;&nbsp;bestLabel, bestProb = None, -1
&nbsp;&nbsp;&nbsp;&nbsp;for classValue, probability in probabilities.items():
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if bestLabel is None or probability &gt; bestProb:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;bestProb = probability
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;bestLabel = classValue
&nbsp;&nbsp;&nbsp;&nbsp;return bestLabel
&nbsp;
&nbsp;
# returns predictions for a set of examples
def getPredictions(info, test):
&nbsp;&nbsp;&nbsp;&nbsp;predictions = []
&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(test)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result = predict(info, test[i])
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;predictions.append(result)
&nbsp;&nbsp;&nbsp;&nbsp;return predictions
&nbsp;
# Accuracy score
def accuracy_rate(test, predictions):
&nbsp;&nbsp;&nbsp;&nbsp;correct = 0
&nbsp;&nbsp;&nbsp;&nbsp;for i in range(len(test)):
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if test[i][-1] == predictions[i]:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;correct += 1
&nbsp;&nbsp;&nbsp;&nbsp;return (correct / float(len(test))) * 100.0
&nbsp;
&nbsp;
# driver code
&nbsp;
# add the data path in your system
filename = r'E:\user\MACHINE LEARNING\machine learning algos\Naive bayes\filedata.csv'
&nbsp;
&nbsp;
# load the file and store it in mydata list
mydata = csv.reader(open(filename, "rt"))
mydata = list(mydata)
mydata = encode_class(mydata)
for i in range(len(mydata)):
&nbsp;&nbsp;&nbsp;&nbsp;mydata[i] = [float(x) for x in mydata[i]]
&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
# split ratio = 0.7
# 70% of data is training data and 30% is test data used for testing
ratio = 0.7
train_data, test_data = splitting(mydata, ratio)
print('Total number of examples are: ', len(mydata))
print('Out of these, training examples are: ', len(train_data))
print("Test examples are: ", len(test_data))
&nbsp;
# prepare model
info = MeanAndStdDevForClass(train_data)
&nbsp;
# test model
predictions = getPredictions(info, test_data)
accuracy = accuracy_rate(test_data, predictions)
print("Accuracy of your model is: ", accuracy)

Comments

Submit

how to make a class in python	ModuleNotFoundError: No module named pip._internal	httpie on windows
python read json file	pandas create new column	python write to file
what is the use of class in python	install opencv python	rename columns pandas
rename columns in python	pip upgrade command	how to upgrade pip
command to upgrade the PIP	download pip install	command to update pip
how to upgrade pip in cmd	how to check python version	python iterate dictionary key value
python install pip	windows python pip upgrade	python virtual environment
how to update pip in python	how to read a file in python	how to update pip python
how to replace nan with 0 in pandas	check package version python	streamlit ssl error
networkx remove nodes with degree	get wd in python	find text between two strings regex python
create zero array in python	upgrade python version mc	python get image dimensions
pandas drop empty columns	summation django queryset	oddlyspecific09123890183019283
convert a dictionary into dataframe python	write dataframe to csv python	pygame draw circle
check value vowel user input python	remove commas from string python	No module named sklearn.cross_validation
pandas version check in python	convert column string to int pandas	python italic
python - prime number generator	.annotate unique distinct	current datetime pandas

naive bayes implementation in python

Comments

0 Comments

More Questions

C

CSS

Objective-C

PHP

Java

C++

Javascript

Html

Python

SQL

Swift

Ruby

Go

R

TypeScript

Kotlin

Assembly

VBA

Scala

Rust

Elixir

Dart

Haskell

Perl

Fortran

Matlab

Scheme

Julia

Lua

Delphi

Pascal

Solidity

BASIC

ActionScript

Csharp